/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				08/12/2018
Date last modified: 		
Purpose: 

1. Import all the OCEW raw data
2. Clean 
	2.1 keep only state level NAICS31-33 observations
	2.2 make sure we have all the NAICS6 industries we need, if not, keep NAICS5 industries


==========================================================================*/


*************************************************************************
* 				PART A. Import data							 			*
*************************************************************************
clear all

* 1. import price
* import 
import delimited using "$SEDSraw/pr_all.csv", varn(1) clear 
foreach v of varlist v4-v50 {
              local label : variable label `v'
              rename `v' price`label'
}
*
* reshape to long
drop data_status
reshape long price, i(state msn) j(year)

* clean the msn variable
gen energy_type = substr(msn, 1, 2)
gen energy_activities = substr(msn, 3, 2)
gen data_type = substr(msn, 5, 1)

tab data_type
drop data_type msn

* save
tempfile price
save `price.dta', replace



* 2. import expenditure
import delimited using "$SEDSraw/ex_all.csv", varn(1) clear 
foreach v of varlist v4-v50 {
              local label : variable label `v'
              rename `v' expenditure`label'
}
* reshape to long
drop data_status
reshape long expenditure, i(state msn) j(year)

* clean the msn variable
gen energy_type = substr(msn, 1, 2)
gen energy_activities = substr(msn, 3, 2)
gen data_type = substr(msn, 5, 1)

tab data_type
drop if data_type == "S"
drop data_type msn

* save
tempfile expenditure
save `expenditure.dta', replace


* 3. merge
merge 1:1 state energy_type energy_activities year using `price.dta'
drop _merge


* 4. label variables
order state year energy_type energy_activities price expenditure
label var energy_type "Type of energy"
label var energy_activities "Energy activity or energy-consuming sector"
label var price "price in dollars per million Btu"
label var expenditure "expenditure in million dollars"


save "$SEDS/raw/priceexpenditure_state_year_fuel.dta", replace


*************************************************************************
* 				PART B. clean dataset 				 					*
*************************************************************************

use "$SEDS/raw/priceexpenditure_state_year_fuel.dta", clear

* keep industrial sector only
keep if energy_activities == "IC"
* I have checked that energy_activities == "OC" and energy_activities == "IS" is not what we want
drop energy_activities

save "$SEDS/industrialpriceexpenditure_state_year_fuel.dta", replace
